'''
电眼看四川项目
获取同花顺中各企业周的股票价格等数据
2020年12月24日
zys
'''
import requests
from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings()
import xlrd ,xlwt
import json

# 从xlutils中导入copy这个功能
from xlutils import copy
import time
import lxml
import xlutils
import re

# 请求头
h={
    "Access-Control-Allow-Headers": "Origin, X-Requested-With, Content-Type, Accept",
    "Access-Control-Allow-Origin": "*",
    "Age": "60",
    "Content-Encoding": "gzip",
    "Content-Type": "application/javascript",
    "Date": "Wed, 23 Dec 2020 06:45:06 GMT",
    "Expires": "Wed, 23 Dec 2020 16:07:53 GMT",
    "Host": "2_232",
    "Last-Modified": "Wed, 23 Dec 2020 06:45:06 GMT",
    "Server": "openresty",
    "Vary": "Accept-Encoding",
    "Referer": "https://m.10jqka.com.cn/",
    "sec-ch-ua": "'Google Chrome';v='87', ' Not;A Brand';v='99', 'Chromium';v='87'",
    "sec-ch-ua-mobile": "?0",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
    "Cookie": "v=A0ogaVRVHoIxbpIuymi3ajdUmzHsO87gwL5CM9SC9X0oj-ThvMsepZBPkkCn",
   }

# --------------爬取第一行数据--------------#
def reqHandle(url,cV):
    html=requests.get(url,headers=h, verify=False)#得到网页
    html_str=html.text#得到网页字符串，这时候你打印出来是html的str
    soup=BeautifulSoup(html_str,'lxml')#用beautifulsoup，lxml来解析网页字符串
    # 替换网页文本数据中的无用数据
    soupText = soup.text.replace('quotebridge_v6_line_hs_'+cV+'_11_last360(','').replace(')','')
    # 获取data数据
    soupJson = json.loads(soupText)['data']
    # print("soupJson：", soupJson)
    # 处理data数据
    dataS = soupJson.split(';')
    # print("dataS：", dataS)
    return str(dataS)

# --------------传入url,爬取数据，写入excel-------------- #
def main():
    # 获取文件对象
    workbook = xlrd.open_workbook('ssmd.xls')

    # 然后用xlutils里面的copy功能，复制一个excel
    # new_book = copy.copy(workbook)
    new_book = xlwt.Workbook()
    new_sheet = new_book.add_sheet("结果数据")
    new_sheet.write(0, 0, '证券代码')
    new_sheet.write(0, 1, '公司名称')
    new_sheet.write(0, 2, '日期')
    new_sheet.write(0, 3, '开盘价格')
    new_sheet.write(0, 4, '高位价格')
    new_sheet.write(0, 5, '低位价格')
    new_sheet.write(0, 6, '收盘价格')
    new_sheet.write(0, 7, '成交量')
    new_sheet.write(0, 8, '成交额')
    new_sheet.write(0, 9, '换手率')

    # 查看文件中所有的工作表名
    sNameS = workbook.sheet_names()[0]

    # sheet索引从0开始
    sheet = workbook.sheet_by_index(0)

    # sheet的名称，行数，列数
    rowN = sheet.nrows
    colN = sheet.ncols
    # print(sheet.name, rowN, colN)

    # 循环Sheet获取行列数据
    count = -1
    for rowV in range(2, rowN):
        cV = sheet.cell_value(rowV, 0)
        cVJC = sheet.cell_value(rowV, 1)
        cVName = sheet.cell_value(rowV, 2)
        if cV != '':
            url = "https://d.10jqka.com.cn/v6/line/hs_"+cV+"/11/last360.js"
            print(cV, '-->', cVJC, '-->', cVName, '>>', url)
            dataR = reqHandle(url,cV)
            dataS = eval(dataR)
            # print(cV, '-->', cVJC, '-->', cVName, '>>', dataS)
            for data in dataS:
                dataC = data.split(',')
                # 获取月份
                dataCy = dataC[0]
                # 获取股票价格
                dataCv = dataC[3]
                print(cV, '-->', cVJC, '-->', cVName, '>>', dataCy,'>',dataCv,'>',url)
                # 写入Excel
                new_sheet.write(rowV+count, 0, cV)
                new_sheet.write(rowV+count, 1, cVName)
                new_sheet.write(rowV+count, 2, dataC[0])
                new_sheet.write(rowV+count, 3, dataC[1])
                new_sheet.write(rowV+count, 4, dataC[2])
                new_sheet.write(rowV+count, 5, dataC[3])
                new_sheet.write(rowV+count, 6, dataC[4])
                new_sheet.write(rowV+count, 7, dataC[5])
                new_sheet.write(rowV+count, 8, dataC[6])
                new_sheet.write(rowV+count, 9, dataC[7])
                count = count+1
    # 保存新工作簿
    new_book.save('ssmdResult.xls')
    print("-----数据采集完成-----")

# 执行入口
if __name__ == '__main__':
    main()
